In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.rcParams["figure.figsize"] = (10,10)
In [2]:
import random
import matplotlib.patches as mpatches
In [3]:
taxi_full_pd = pd.read_csv('../data/taxi_short_2.csv')
In [4]:
center_lat = 40.76
center_lng = -73.925
dlat = 0.1
dlng = 0.1
min_lat = center_lat - dlat
max_lat = center_lat + dlat
min_lng = center_lng - dlng
max_lng = center_lng + dlng
In [5]:
taxi_pd = pd.DataFrame(taxi_full_pd[(taxi_full_pd.trip_distance <= 50) &
(taxi_full_pd.trip_distance > 0.1) &
(taxi_full_pd.trip_time_in_secs > 1) &
(taxi_full_pd.fare_amount < 50) &
(taxi_full_pd.fare_amount*2 % 1 == 0) &
(taxi_full_pd.pickup_latitude > center_lat - dlat) &
(taxi_full_pd.pickup_latitude < center_lat + dlat) &
(taxi_full_pd.pickup_longitude > center_lng - dlng) &
(taxi_full_pd.pickup_longitude < center_lng + dlng)])
del taxi_full_pd
In [8]:
# drop garbage columns
taxi_pd.drop(taxi_pd.columns[:2],axis=1, inplace=True)
taxi_pd.head()
Out[8]:
In [9]:
num_lat_bins = 40
num_lng_bins = 40
lat_bins = np.linspace(min_lat, max_lat, num_lat_bins+1)
lng_bins = np.linspace(min_lng, max_lng, num_lng_bins+1)
In [10]:
print lat_bins[:5]
print lng_bins[:5]
In [18]:
pick_clat = pd.cut(taxi_pd.pickup_latitude.values, lat_bins)
print pick_clat
In [21]:
print pick_clat[:5]
In [22]:
print pick_clat.codes[:5]
In [25]:
print pick_clat.codes[:50]
In [26]:
pick_clng = pd.cut(taxi_pd.pickup_longitude.values, lng_bins)
drop_clat = pd.cut(taxi_pd.dropoff_latitude.values, lat_bins)
drop_clng = pd.cut(taxi_pd.dropoff_longitude.values, lng_bins)
In [31]:
pick_s = pd.Series(taxi_pd.pickup_longitude)
drop_s = pd.Series(taxi_pd.dropoff_longitude)
print pick_s[:5]
In [35]:
pick_s.groupby([pick_clat.codes, pick_clng.codes]).count()
Out[35]:
In [11]:
#bin stops (pickup or dropoff) in uniform lng/lat bins
lat_lng_mi = pd.MultiIndex.from_product([range(0, num_lat_bins),
range(0, num_lng_bins)],
names=['lat', 'lng'])
print lat_lng_mi
print lat_lng_mi.labels
In [27]:
#total number of stops (pickup or dropoff) in each lng/lat bin
stop_binned_cnts = pick_s.groupby([pick_clat.codes, pick_clng.codes]).count()
In [36]:
pick_s.groupby([pick_clat.codes, pick_clng.codes]).count().reindex(lat_lng_mi).fillna(0)
Out[36]:
In [37]:
stop_binned_cnts_ri = stop_binned_cnts.reindex(lat_lng_mi).fillna(0)
In [38]:
stop_binned_cnts_ri.values[:5]
Out[38]:
In [40]:
stop_binned_cnts_ri[:5]
Out[40]:
In [ ]: